## A file to produce some basic descriptions of the data used as numbers within
## the text. We run it with R BATCH and then copy numbers into the main body.tex document
## that document has the raw output from supp_desc_new.Rout as comments for the paragraphs where we use
## the numbers so that we can see where those numbers came from.

library(here)
library(dplyr)
library(ggplot2)

load(here("Data", "big_wrkdat_thin.rda"), verbose = TRUE)
## Total number of respondents
nrow(big_wrkdat_thin)
# [1] 7811

table(big_wrkdat_thin$white)
#
#    0    1
#  456 7355

## Total number of DAs
length(unique(big_wrkdat_thin$dauid))
# [1] 6369

## People excluded for not providing any perception:
table(big_wrkdat_thin$allperceptionsmissing)
#
# FALSE  TRUE
#  7570   241

## Only people who saw their DA
load(here("Data", "wrkdat_DA0_new.rda"), verbose = TRUE)

## Their perceptions of their DA when shown their DA polygon
summary(wrkdat_DA0_new$vm)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
#  0.0400  0.1600  0.2800  0.3723  0.4900  4.6400     427
quantile(wrkdat_DA0_new$vm, c(.05, .5, .95), na.rm = TRUE)
#    5%   50%   95%
# 0.060 0.280 0.893
### The numbers when forced to go from 0 to 1.
summary(wrkdat_DA0_new$vm.norm2)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
#  0.0400  0.1600  0.2800  0.3563  0.4900  1.0000     427
quantile(wrkdat_DA0_new$vm.norm2, c(.05, .5, .95), na.rm = TRUE)
#    5%   50%   95%
# 0.060 0.280 0.893

## Census
summary(wrkdat_DA0_new$da_prop_vm_20pct_06)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
# 0.00000 0.01942 0.06828 0.13518 0.18432 0.96321
quantile(wrkdat_DA0_new$da_prop_vm_20pct_06, c(.05, .5, .95))
#         5%        50%        95%
# 0.00000000 0.06827894 0.50640449

wrkdat_DA0_new$vm_over_da_prop_vm_20pct_06 <- with(wrkdat_DA0_new, vm.norm2 / da_prop_vm_20pct_06)
wrkdat_DA0_new$vm_diff <- with(wrkdat_DA0_new, vm.norm2 - da_prop_vm_20pct_06)
summary(with(wrkdat_DA0_new[wrkdat_DA0_new$da_prop_vm_20pct_06 > 0, ], vm / da_prop_vm_20pct_06))
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
#  0.2135  1.3020  2.3567  4.1220  4.5825 59.2250     309
quantile(with(wrkdat_DA0_new[wrkdat_DA0_new$da_prop_vm_20pct_06 > 0, ], vm / da_prop_vm_20pct_06), c(.1, .5, .9), na.rm = TRUE)
#       10%       50%       90%
# 0.8962499 2.3566667 8.6063636

load(here("Data", "wrkdatOwnMap_new.rda"), verbose = TRUE)
summary(wrkdatOwnMap_new$vm.community.subj)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  0.0000  0.1225  0.2900  0.3647  0.5175  4.2700
summary(wrkdatOwnMap_new$vm.community.norm2)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  0.0000  0.1225  0.2900  0.3506  0.5175  1.0000

## Our best guess of the census proportion in the map.
summary(wrkdatOwnMap_new$vm.community.obj)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
# 0.00000 0.03042 0.09293 0.14468 0.21029 0.94224      11
summary(wrkdatOwnMap_new$prop_vmpop_map_avg)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
#  0.0000  0.0300  0.0960  0.1401  0.2077  0.8569    5101
summary(wrkdatOwnMap_new$prop_vmpop_map_dawt)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
#  0.0000  0.0227  0.0782  0.1245  0.1800  0.7938    5099

## Comparing the different approaches to weighting underlying Ceneus DAs when
## measuring "objective context" in the subjective maps.
cor(wrkdatOwnMap_new$prop_vmpop_map_avg, wrkdatOwnMap_new$prop_vmpop_map_dawt,
    use = "complete.obs"
)
# [1] 0.9405247


################
## The anyDA design:
load(here("Data", "wrkdatOwnMap_anyDA_new.rda"), verbose = TRUE)
load(here("Design", "matches_anyDA_new.rda"), verbose = TRUE)

wdat0 <- wrkdatOwnMap_new %>%
    filter(!is.na(vm_change) & !is.na(social.capital01) & !is.na(community.resp01)) %>%
    droplevels()

wdat0 <- left_join(wdat0, matches_anyDA_new)

## How many people did we exclude from the matching:
table(is.na(wdat0$pair))
## Record number of pairs, number of DAs.
stopifnot(unique(table(wdat0$pair)) == 2)

num_pairs <- sum(!is.na(wdat0$pair)) / 2
num_das <- length(unique(wdat0$dauid[!is.na(wdat0$pair)]))

## Total number of valid respondents in the anyDA design
nrow(wdat0)
# [1] 6386
## Total number of respondents used in the matching after exclusions
nrow(wdat0[!is.na(wdat0$pair), ])
# [1] 3772
## Number of pairs
num_pairs
# [1] 1886
## Number of DAs
num_das
# [1] 3098

## How many people did we exclude either for not providing any perceptions or
## not drawing a map?
nrow(big_wrkdat_thin)
nrow(wdat0)
nrow(big_wrkdat_thin) - nrow(wdat0)

## How many DAs shared across pairs?
num_das <- with(wdat0[!is.na(wdat0$pair), ], table(dauid))
summary(num_das)
# Number of cases in table: 3772
# Number of factors: 1
table(num_das)
# num_das
#    1    2    3    4    5    6    9
# 2533  496   47   12    5    4    1
table(num_das > 1)
#
# FALSE  TRUE
#  2533   565
table(num_das > 3)
#
# FALSE  TRUE
#  3076    22

## Summarize main analysis
load(here("Analysis", "analysis_anyDA_new.rda"), verbose = TRUE)

res
#                        Estimate Std. Error         ci1         ci2
# Social Cohesion     -0.06553736 0.01053103 -0.08617576 -0.04490193
# Collective Efficacy -0.05057089 0.01397528 -0.07797394 -0.02318029

## To help interpret the results of the analysis
summary(wdat0[!is.na(wdat0$pair), c("social.capital01", "community.resp01", "vm.community.norm2")])
#  social.capital01 community.resp01 vm.community.norm2
#  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000
#  1st Qu.:0.5833   1st Qu.:0.6250   1st Qu.:0.0900
#  Median :0.7500   Median :0.7500   Median :0.2300
#  Mean   :0.7041   Mean   :0.7412   Mean   :0.2994
#  3rd Qu.:0.8333   3rd Qu.:0.8750   3rd Qu.:0.4500
#  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000
sapply(wdat0[!is.na(wdat0$pair), c("social.capital01", "community.resp01", "vm.community.norm2")], sd, na.rm = TRUE)
#   social.capital01   community.resp01 vm.community.norm2
#          0.1493546          0.1974179          0.2561378
sapply(wdat0[!is.na(wdat0$pair), c("social.capital01", "community.resp01", "vm.community.norm2")], function(x) {
    return(quantile(x, seq(0, 1, .1), na.rm = TRUE))
})
#     social.capital01 community.resp01 vm.community.norm2
# 0%          0.0000000            0.000               0.00
# 10%         0.5000000            0.500               0.03
# 20%         0.5833333            0.625               0.07
# 30%         0.6666667            0.625               0.11
# 40%         0.6666667            0.750               0.17
# 50%         0.7500000            0.750               0.23
# 60%         0.7500000            0.875               0.31
# 70%         0.7500000            0.875               0.39
# 80%         0.8333333            0.875               0.50
# 90%         0.9166667            1.000               0.68
# 100%        1.0000000            1.000               1.00


pair_diffs_abs <- wdat0 %>%
    filter(!is.na(pair)) %>%
    group_by(pair) %>%
    summarize(
        perc_diffs = abs(diff(vm.community.norm2)),
        cohesion_diffs = abs(diff(social.capital01)),
        efficacy_diffs = abs(diff(community.resp01))
    )

summary(pair_diffs_abs)
#       pair          perc_diffs     cohesion_diffs
#  Min.   :   1.0   Min.   :0.0000   Min.   :0.00000
#  1st Qu.: 472.2   1st Qu.:0.0700   1st Qu.:0.08333
#  Median : 943.5   Median :0.1600   Median :0.16667
#  Mean   : 943.5   Mean   :0.2239   Mean   :0.15893
#  3rd Qu.:1414.8   3rd Qu.:0.3200   3rd Qu.:0.25000
#  Max.   :1886.0   Max.   :1.0000   Max.   :0.91667
#  efficacy_diffs
#  Min.   :0.0000
#  1st Qu.:0.1250
#  Median :0.1250
#  Mean   :0.2126
#  3rd Qu.:0.2500
#  Max.   :1.0000
summary(pair_diffs_abs$perc_diffs)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  0.0000  0.0700  0.1600  0.2239  0.3200  1.0000
sapply(pair_diffs_abs[, -1], sd, na.rm = TRUE)
#     perc_diffs cohesion_diffs efficacy_diffs
#      0.2001013      0.1345321      0.1736004
sapply(pair_diffs_abs[, -1], function(x) {
    return(quantile(x, seq(0, 1, .1), na.rm = TRUE))
})
#      perc_diffs cohesion_diffs efficacy_diffs
# 0%        0.000     0.00000000          0.000
# 10%       0.030     0.00000000          0.000
# 20%       0.060     0.08333333          0.000
# 30%       0.080     0.08333333          0.125
# 40%       0.120     0.08333333          0.125
# 50%       0.160     0.16666667          0.125
# 60%       0.210     0.16666667          0.250
# 70%       0.280     0.16666667          0.250
# 80%       0.370     0.25000000          0.375
# 90%       0.535     0.33333333          0.500
# 100%      1.000     0.91666667          1.000


## Also looking at how education and income differ within
## pair as a part of our exploration of alternative explanations

wdat0$BAplus <- as.numeric(wdat0$educnew %in%
    c("bachelor's degree", "master's degree", "professional degree or doctorate"))

pair_diffs <- wdat0 %>%
    filter(!is.na(pair)) %>%
    group_by(pair) %>%
    mutate(rank_perc = rank(vm.community.norm2)) %>%
    filter(rank_perc != .5) %>% # exclude pairs with the same perceptions
    reframe(
        educ_diff = BAplus[rank_perc == 2] - BAplus[rank_perc == 1],
        income_diff = income.coded[rank_perc == 2] - income.coded[rank_perc == 1],
        cohesion_diff = social.capital01[rank_perc == 2] - social.capital01[rank_perc == 1],
        efficacy_diff = community.resp01[rank_perc == 2] - community.resp01[rank_perc == 1],
        educ_abs_diff = abs(diff(BAplus)),
        inc_abs_diff = abs(diff(income.coded))
    ) %>%
    ungroup()

pair_diffs
# # A tibble: 1,882 × 7
#     pair educ_diff income_diff cohesion_diff efficacy_diff educ_abs_diff inc_abs_diff
#    <int>     <dbl>       <dbl>         <dbl>         <dbl>         <dbl>        <dbl>
#  1     1         1           4       -0.0833         0.625             1            4
#  2     2        -1          NA        0.25           0.5               1           NA
#  3     3         0          -5       -0.5            0.125             0            5
#  4     4        -1           4        0.333         -0.125             1            4
#  5     5         0           6        0.0833         0.5               0            6
#  6     6         0          -5        0              0.25              0            5
#  7     7         0          -4        0.0833         0.375             0            4
#  8     8         0           1       -0.167          0                 0            1
#  9     9         1          -7        0             -0.5               1            7
# 10    10         1          NA        0.0833        -0.125             1           NA
# # ℹ 1,872 more rows
# # ℹ Use `print(n = ...)` to see more rows

summary(pair_diffs)
#       pair          educ_diff         income_diff       cohesion_diff      efficacy_diff
#  Min.   :   1.0   Min.   :-1.00000   Min.   :-11.0000   Min.   :-0.91667   Min.   :-0.875000
#  1st Qu.: 474.2   1st Qu.:-1.00000   1st Qu.: -4.0000   1st Qu.:-0.16667   1st Qu.:-0.250000
#  Median : 945.5   Median : 0.00000   Median :  0.0000   Median : 0.00000   Median : 0.000000
#  Mean   : 944.7   Mean   :-0.02444   Mean   : -0.1906   Mean   :-0.01443   Mean   :-0.009299
#  3rd Qu.:1415.8   3rd Qu.: 0.00000   3rd Qu.:  3.0000   3rd Qu.: 0.08333   3rd Qu.: 0.125000
#  Max.   :1886.0   Max.   : 1.00000   Max.   : 11.0000   Max.   : 0.91667   Max.   : 1.000000
#                                      NA's   :471
#  educ_abs_diff     inc_abs_diff
#  Min.   :0.0000   Min.   : 0.000
#  1st Qu.:0.0000   1st Qu.: 1.000
#  Median :0.0000   Median : 4.000
#  Mean   :0.4803   Mean   : 3.979
#  3rd Qu.:1.0000   3rd Qu.: 6.000
#  Max.   :1.0000   Max.   :11.000
#                   NA's   :471

sapply(pair_diffs[, -1], function(x) {
    return(quantile(x, seq(0, 1, .1), na.rm = TRUE))
})
#      educ_diff income_diff cohesion_diff efficacy_diff educ_abs_diff inc_abs_diff
# 0%          -1         -11   -0.91666667        -0.875             0            0
# 10%         -1          -7   -0.25000000        -0.375             0            0
# 20%         -1          -5   -0.16666667        -0.250             0            1
# 30%          0          -3   -0.08333333        -0.125             0            2
# 40%          0          -1   -0.08333333        -0.125             0            3
# 50%          0           0    0.00000000         0.000             0            4
# 60%          0           1    0.00000000         0.000             1            4
# 70%          0           2    0.08333333         0.125             1            6
# 80%          1           4    0.16666667         0.250             1            7
# 90%          1           6    0.25000000         0.375             1            8
# 100%         1          11    0.91666667         1.000             1           11

mean(pair_diffs$educ_abs_diff == 0)
# [1] 0.5196599

educ_diffs <- table(pair_diffs$educ_diff, exclude = c())
educ_diffs
#
#  -1   0   1
# 475 978 429
educ_diffs / sum(educ_diffs)
#
#        -1         0         1
# 0.2523911 0.5196599 0.2279490

income_diffs <- table(pair_diffs$income_diff, exclude = c())
income_diffs
#
#  -11  -10   -9   -8   -7   -6   -5   -4   -3   -2   -1    0    1    2    3    4    5    6    7    8    9
#   23   16   40   36   60   57   55   89   67   91  105  160   97   94   81   73   68   61   46   35   34
#   10   11 <NA>
#   14    9  471
income_diffs / sum(income_diffs)
#
#         -11         -10          -9          -8          -7          -6          -5          -4          -3
# 0.012221041 0.008501594 0.021253985 0.019128587 0.031880978 0.030286929 0.029224230 0.047290117 0.035600425
#          -2          -1           0           1           2           3           4           5           6
# 0.048352816 0.055791711 0.085015940 0.051540914 0.049946865 0.043039320 0.038788523 0.036131775 0.032412327
#           7           8           9          10          11        <NA>
# 0.024442083 0.018597237 0.018065887 0.007438895 0.004782147 0.250265675

pair_diffs$income_diff_simp <- sign(pair_diffs$income_diff)
income_diffs_simp <- table(pair_diffs$income_diff_simp)
income_diffs_simp
#
#  -1   0   1
# 639 160 612
income_diffs_simp / sum(income_diffs_simp)
#
#        -1         0         1
# 0.4528703 0.1133948 0.4337349

## What proportion of those who perceive more within pair show high cohesion
## within pair?
cohesion_diffs <- table(pair_diffs$cohesion_diff, exclude = c())
cohesion_diffs
cohesion_diffs / sum(cohesion_diffs)

cohesion_diffs_simp <- table(sign(pair_diffs$cohesion_diff))
cohesion_diffs_simp
cohesion_diffs_simp / sum(cohesion_diffs_simp)

## What proportion of those who perceive more within pair show high community
## efficacy within pair?

efficacy_diffs <- table(pair_diffs$efficacy_diff, exclude = c())
efficacy_diffs
efficacy_diffs / sum(efficacy_diffs)

efficacy_diffs_simp <- table(sign(pair_diffs$efficacy_diff))
efficacy_diffs_simp
efficacy_diffs_simp / sum(efficacy_diffs_simp)


wdat0 <- wdat0 %>%
    group_by(pair) %>%
    mutate(
        educ_rank = ifelse(!is.na(pair), rank(BAplus) - 1, NA),
        income_rank = ifelse(!is.na(pair), rank(income.coded) - 1, NA),
        subj_rank = ifelse(!is.na(pair), rank(vm.community.obj) - 1, NA)
    ) %>%
    ungroup()

table(wdat0$educ_rank)
table(wdat0$educ_rank == .5)
mean(wdat0$educ_rank == .5, na.rm = TRUE)

with(wdat0[!is.na(wdat0$pair), ], table(educ_rank, subj_rank, exclude = c()))

## Relative over estimation within pair

wdat0 <- wdat0 %>%
    filter(!is.na(pair)) %>%
    group_by(pair) %>%
    mutate(
        more_diverse_pseudoenv = rank(vm.community.subj),
        perc_bias = vm.community.obj - vm.community.subj
    ) %>%
    ungroup()

table(wdat0$more_diverse_pseudoenv)

wdat0 %>%
    group_by(more_diverse_pseudoenv) %>%
    summarize(mean(perc_bias, na.rm = TRUE), mean(vm.community.subj), n())

## A tibble: 2 × 4
#  more_diverse_pseudoenv `mean(perc_bias, na.rm = TRUE)` `mean(vm.community.subj)` `n()`
#                   <dbl>                           <dbl>                     <dbl> <int>
# 1                      1                         -0.0954                     0.188  1886
# 2                      2                         -0.309                      0.430  1886
